{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NLTK使用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nltk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nltk.download()  # 下载NLTK数据集"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.book import *  # 导入book模块的所有内容"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Text: Moby Dick by Herman Melville 1851>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text1  # 查看book模块的指定内容（第一篇是Moby Dick，即白鲸记）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Displaying 11 of 11 matches:\n",
      "ong the former , one was of a most monstrous size . ... This came towards us , \n",
      "ON OF THE PSALMS . \" Touching that monstrous bulk of the whale or ork we have r\n",
      "ll over with a heathenish array of monstrous clubs and spears . Some were thick\n",
      "d as you gazed , and wondered what monstrous cannibal and savage could ever hav\n",
      "that has survived the flood ; most monstrous and most mountainous ! That Himmal\n",
      "they might scout at Moby Dick as a monstrous fable , or still worse and more de\n",
      "th of Radney .'\" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l\n",
      "ing Scenes . In connexion with the monstrous pictures of whales , I am strongly\n",
      "ere to enter upon those still more monstrous stories of them which are to be fo\n",
      "ght have been rummaged out of this monstrous cabinet there is no telling . But \n",
      "of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u\n"
     ]
    }
   ],
   "source": [
    "text1.concordance(\"monstrous\")  # 查看text1中出现的词monstrous"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "it him hand them nantucket death that whales man head time ahab all\n",
      "ship sight whale which me men short\n"
     ]
    }
   ],
   "source": [
    "text1.similar(\"life\")  # 检索指定词的上下文"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mother time mind own heart family sister children dear spirits head\n",
      "her it that conduct love word and house marriage\n"
     ]
    }
   ],
   "source": [
    "text2.similar(\"life\")  # 可以得出，不同的词在不同的文章含义是不一样的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "my_i and_mrs my_that my_said my_you my_and\n"
     ]
    }
   ],
   "source": [
    "text2.common_contexts([\"love\", \"dear\"])  # 检索多个词共同的上下文"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZsAAAEWCAYAAACwtjr+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de5hddX3v8fcHBjKUQAIGKyiZEaxyK0YzKBF0BkSPcBT1iCJe47EPxSpKPZQTTWtGjx6DWhUQxZSjIy3UC2CLlxYsOCIUkAkEAgoiEGoUFQpS7nL5nj/Wb7lXVvbes2dm/+ZiPq/n2c9e+3f9rrXX7O+sy+xRRGBmZpbTVjMdgJmZ/eFzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxsbIsk6cWSbu7COBskHTaF/m+WdNFU4+iWbm2XScwbkp413fPa9HGysTlhqh/qdRHxw4h4TrfGa0bSiKTfSbo/PW6Q9HFJCypxnB0RL88Zx0Tk2i6S+lNCeSA9NkhaMYlxlku6rNvxWX5ONmZ5fSIidgB2Ad4BHAhcLmn7mQpI0tYzNTewMCLmA8cAH5L0ihmMxaaRk43NaZK2krRC0q2S/lPS1yXtnOq+IOncStuTJV2swpCkjZW63SWdL+muNM7nUvmeki5JZXdLOlvSwonGGRGPRMTVwJHAUygSzya/qae4PiPpN5Luk3S9pP1S3YikMyR9Lx0l/UBSXyX+vVLdPZJulvSGSt1I2hbflfQgcIikIyT9OI31C0knprb17bK3pFFJv5V0o6Qja+OeLuk7aZyrJO3Z4fa4ArgR2K9eJ2mBpLPSe3GHpL9O7/PewBnAsnR09NvO3wGbaU42Nte9F3gNMAjsBtwLnJ7q/hewf/pAfzHwTuDtUfuOpvSb/reBO4B+4OnAV8tq4ONp7L2B3YHhyQYbEfcD3wNe3KT65cBLgGcDC4Gjgf+s1L8Z+D/AImAdcHaKf/s05jnAUymOGj4vad9K3zcBHwN2AC4D/h/w5+moaz/gknowkrYBvgVclMY9HjhbUvU02zHAh4GdgJ+lOdpKSfUgYF/g2iZNTgMWAHtQvK9vA94RET8BjgOuiIj5ETHhpG8zx8nG5ro/B1ZGxMaIeJQiERwlqSciHgLeAnwa+Afg+IjY2GSMF1Akk7+KiAfTUchlABHxs4j4XkQ8GhF3pbEGpxjzL4Gdm5Q/RpEM9gIUET+JiDsr9d+JiEvTeq6k+A1/d+CVwIaI+HJEPB4R1wDnAUdV+v5zRFweEU9GxCNprn0k7RgR96Y+dQcC84HVEfG7iLiEIikfU2lzfkT8KCIep0h+S8ZZ97uBe4AzgRURcXG1MiX+o4EPRMT9EbEB+FvgreOMa7Ock43NdX3AN9Npnt8CPwGeAP4YICJ+BNxGcYTy9RZj7A7ckT4wNyHpqZK+mk41/RdF0lo0xZifTvGBu4n0Yf45iiOzX0taI2nHSpOfV9o+kMbYjWIbvLDcBmk7vBl4WrO+yeuAI4A70im5ZU3i3A34eUQ8WSm7I8Vf+lVl+SGK5NTOoojYKSL2johTm9UD26Z5Ws1pc5CTjc11PwcOj4iFlUdvRPwCQNK7gXkURxMntRljsaSeJnUfBwLYPyJ2pDhS0mSDlTQfOAz4YbP6iDg1IpZSnGJ6NvBXlerda+PsTLFePwd+UNsG8yPiXdWha/NcHRGvpjg99k80T8S/BHaXVP2cWAz8orO1nZS7KY66+ipl1Tn9NfVzlJONzSXbSOqtPHooLhh/rLxYLmkXSa9Oy88GPkqRIN4KnCSp2WmeHwF3AqslbZ/GPijV7QA8APxW0tPZ9MO/Y5LmSVpK8cF+L/DlJm0OkPTCdK3kQeARiqO00hGSDpa0LcW1m6si4ucUp7aeLemtkrZJjwPSBfVmsWyr4u97FkTEY8B/1eYpXZXiOCmNOQS8isb1rK6LiCcoEt/HJO2Q3tf3UxxRAvwaeEbaBjaHONnYXPJd4OHKYxg4BbgAuEjS/cCVFKeUeig+oE6OiOsi4hbgg8DfS5pXHTR9wL0KeBbwH8BGiusGUFz8fj5wH/Ad4PwJxnxSiuse4CxgLfCiiHiwSdsdgb+jSEZ3UNwc8KlK/TnAqjTWUopTZeVNBy8H3khxNPIr4GSKI7pW3gpsSKcGj6NIyJuIiN9R3D13OMURx+eBt0XETZ2s+BQcT5HkbqO4meEc4Eup7hKKu9h+JenuzHFYF8n/PM1s9pM0AmyMiL+e6VjMJsNHNmZmlp2TjZmZZefTaGZmlp2PbMzMLLtmf1ewxVu0aFH09/fPdBhmZnPK2rVr746IXZrVOdk00d/fz9jY2EyHYWY2p0i6o1WdT6OZmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZTdjyUbiOIm3peXlErtV6s6U2GemYjMzs+6asWQTwRkRnJVeLodGsongzyL48YwE1gX9/TA8XDygsTw8DENDjeWyXX9/UV72LZfLvmWfat9mr6vz1svLsco2ddV4q/O3W5dmfZuNXRoagt5eWLiweJRtFy4s6so4q2MODTXWsxyjHku1T/25HluzsZqNUVdvU8bVap5q++r2L/vWx673qZe32u7V97OT7VFX3eb1Ns3igc3jb9W/WUzNbLVV87nb9RtvH66q/zyNN15vb2dztdJsvvo+V913mo3d7L0s+5T7XvVno7e30b58T1u9T/X9pfzMafe+doMiIu8M5UTFUcyJQADXA7cCDwAbgBHgF8DDwDLgX1Lb3YCPpCG2A7aN4JkSS4FPA/OBu4HlEdwpMQpcBRwCLATeGcEPJfYFvgxsS5FgXxfBLa1iHRgYiLGxsams6+9FbPq6nWrb8m3ptO9Exq6OX6rOKzWfv9UYrfrWNVuX8cZs1bbVtqrGMF7sreZtFv9Etkm7uOrzNRuvWXnVeHG02x7N3vf6nK22Rbtx2o1XX69WfZpt407naden3Tp10qaT7TbeWO32yVbrUV2XZvt9M+O93/WYmvWfCklrI2KgWd20HNmkD/uVwKERPBd4X1kXwbnAGPDmCJZE8HCl7oJUtgS4DviUxDbAacBRESwFvgR8rDJdTwQvAE4AVqWy44BT0jgDwMZc62pmZpvrmaZ5DgXOjeBugAjumchv7BInAQ9HcLrEfsB+wPfSGFsDd1aan5+e1wL9afkKYKXEM4Dzmx3VSDoWOBZg8eLFnQdnZmbjmq5rNqI4fTbxjuKlwOspjk7KsW4sj3gi+NMIXl7p8mh6foKUTCM4BziS4jTdhRKH1ueJiDURMRARA7vssstkQjUzsxamK9lcDLxB4ikAEjvX6u8Hdqh3kugDPg+8oXJ67WZgF4llqc026TRdSxJ7ALdFcCpwAbD/VFbGzMwmZlpOo0Vwo8THgB9IPAFcS3FjQGkEOEP6/Q0CpeXAU4BvplNmv4zgCImjgFMlFlCsw2eBG9uEcDTwFonHgF/RuOkgi74+WL688XrVqsby6Gjjro+RkaLdyEjjDpK+vk3vEFq1qtGn2rc+VnW8upGRxlit2lRjHBzsbF2alTWrr4575ZWNu31OOKF4XrAAliyBDRs2H2d0tHgu13NwsLFcn6t8XX+u1le3WXWs8eKvr2MZV7s4Sn19m5ZX+3Y6Z6v66vs50e1Rja3ZnK3Gq+4fzWJq97qZ8pR6p/tRq/pWfarxdrKt582b/Fyt5qtuu/JneSJjV/e5+vYfGoLVq2HFiuJ1+Z62ep/qMVU/f3KatrvR5pKp3o1mZrYlmvG70czMbMvmZGNmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlt2Ek43EsMSJOYKx6Tc83N0xJjre8HB3YrCZMTwMQ0PdfQ9bjTU8DL29jX1meBi2Sp9g1Riq+9TQUJ5YumVoaPMY+/s3Xcdmj3psE/kZnMrP61QoIibWQQwDD0TwqSwRjT9/TwSP55xjYGAgxsbGck4xa0gwwV2g7RgTHU8qnqcag82M8v2D7r2Hrfah6lxVEZvuR/XlqcTVjZ+P8caHTedotZ5V9XWbyM/gVH5exyNpbUQMNKvr6MhGYqXEzRL/Bjwnle0p8a8SayV+KLFXKh+R+ILE9yVukxiU+JLETyRGKmMeI7Fe4gaJkyvlr5C4RuI6iYtT2bDEGomLgLMk+tOc16THiyr9T0rjXiexOsV5TaX+TyTWTmgLmpnZlPSM10BiKfBG4Hmp/TXAWmANcFwEt0i8EPg8cGjqtlNaPhL4FnAQ8GfA1RJLgN8AJwNLgXuBiyReA1wO/B3wkghul9i5EspS4OAIHpb4I+BlETwi8SfAPwIDEocDrwFeGMFDEjtHcI/EfRJLIlgHvAMaSa+xnjoWOBZg8eLFHW08MzPrzLjJBngx8M0IHgKQuADoBV4EfKNyyDev0udbEYTEeuDXEaxPfW8E+oE+YDSCu1L52cBLgCeASyO4HSCCeypjXhDBw2l5G+BzKXE9ATw7lR8GfLmMtdL/TOAdEu8HjgZeUF/JiFhDkUAZGBjwSR0zsy7qJNkA1D98twJ+G8GSFu0fTc9PVpbL1z3Q8pqLmsxVerCy/JfAr4HnplgeGaf/ecAq4BJgbQT/2WIOMzPLoJNkcykwIrE6tX8V8EXgdonXR/ANCQH7R3Bdh/NeBZwisYjiNNoxwGnAFcDpEs8sT6PVjm5KC4CNETwp8XZg61R+EfAhiXOqp9HS6bYLgS8A7+wwxi3CqlXdHWOi43Vjfps5q1bB6OjU7/qqj9mqfPVqWLGiUfaRjxTPg4ONGKr9BwfzxNItzeLr64Ply8fv2+rnbryYp/LzOhUd3Y0msRJ4G3AHsBH4McXRwheAXSlOa301go+kmwC+HcG5Ev1peb80TrXuTcAHKI5GvhvBSanN4cD/pThi+U0EL6vfAZeu05wHPAR8Hzg+gvmpbkWK9Xdp3A+m8gNTn8URPNFufbeku9HMzLql3d1oE771ea5Kfxu0IIK/Ga+tk42Z2cS1SzadXrOZ0yS+CexJ4245MzObRltEsongtTMdg5nZlszfjWZmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtllSzYS75X4icTZXR53VGKgm2POpP5+GB5uvB4aaixXy7tpeHjzR72+W/OUz9X1ajVXtf1EYmi1HtOtnH/hws3L2rXvtHwi7fr7m7ebTDxV9f2z/t5Wx6huh2Zx1N+zyez7Q0ONRzlef3/jdW/vpvMMDRVxlX3KtmW7ehzlcnXfLMvr+2t1W1TnXLhw09jK8t7eRgxV9W3T6uezHKs6dk9P8ejt3XSdmm3nsk+zOXJQROQZWNwEHB7B7ZWynggen+K4o8CJEYxNMcSWBgYGYmws2/CbkIrn8m2Qmi/nmLOqOk+35i3Hqa9jszadtm81TynT7txxHGX8nbyHreo63f6djj3VeNqNW+pkrqn0bRdPp6r71nhtmu2T9edWY45X36y8vp1axVOPqRPt1qnVHJMlaW1END0YyHJkI3EGsAdwgcR9EmskLgLOkthF4jyJq9PjoNRne4kvpbJrJV6dyreT+KrE9RJfA7arzHOMxHqJGyROrpQ/IHGyxFqJf5N4QToiuk3iyBzrbGZmrWVJNhEcB/wSOAT4DLAUeHUEbwJOAT4TwQHA64AzU7eVwCWp/BDgkxLbA+8CHopgf+BjaSwkdgNOBg4FlgAHSLwmjbU9MBrBUuB+4KPAy4DXAh9pFrOkYyWNSRq76667urcxzMyMnmma54IIHk7LhwH7VA4Bd5TYAXg5cKTEiam8F1gMvAQ4FSCC6yWuT/UHUCSUuwDStaGXAP8E/A7419RuPfBoBI9JrAf6mwUYEWuANVCcRpvqCpuZWcN0JZsHK8tbAcsqyQcACQGvi+DmWjlAsw//dmcsH4v4fZ8ngUcBInhSmrZ1NjOzZCY+eC8C3gN8EkBiSQTrgAuB4yWOjyAknhfBtcClwJuB70vsB+yfxrkKOEViEXAvcAxw2jSvy5T19cHy5Y3Xg4ON5VWr8sw53rjdmrccZ9UqGB0df65q+8nMM9PKOBYs2LysXftOyyfSrq+vebvJxFPVbP+svrfVMarboZM4JrPvV/uUd1mNjDTuMlu9Glas2LT9unWwZEnxesOGou2VVzbaVccsl+v75uBgY75qrOW2qG+HE05oxFb+vM+bBwceWMRQVV/3Vq/rnx0jI7BxY7Hckz7Zy3WqjlFdp5GR5nPkkPNutA3AAEVieSCCT6XyRcDpwN4Uye7SCI6T2A74LPAiiqOWDRG8MpV/GdgHWAc8C3hvBGMSbwI+kNp/N4KT0hwPRDA/LQ/X5v99XSvTeTeamdkfinZ3o2VLNnOZk42Z2cRN+63PZmZmVU42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWnZONmZllN+uSjcSwxIlt6pdIHFF5faTEiumJrjPDw+3ryvr6M8DQUKNNtS1Af39R32q+ZvM2m6u/v3mf/v5GDNV4xtNJm3o8zcrK+esWLtw83t7ezbdPs3jazdeuTbu68cYfb4yyf3Wdyve9nU7nrY5fjt3s9Xg6WbfJ9JnMuN2c/w9x7ur8newbM0ERMXOzNyExDDwQwada1C8HBiJ4T64YBgYGYmxsbNL9JWi1WaXiOaLRrtq+rK+q19XHbjZOq7rq+M3mblU22fXtpG27+Mv6erz19Wg1R7v5Ool/vP6T2T7jvSedjNNJzM3e607maRV3J7q9v0xUzrFn89zV+Se6P3c3Bq2NiIFmdbPiyEZipcTNEv8GPCeVjUoMpOVFEhsktgU+AhwtsU7iaInlEp9L7XaROE/i6vQ4KJUPpvbrJK6V2GGGVtXMbIvUM9MBSCwF3gg8jyKea4C1zdpG8DuJD1E5sklHOqVTgM9EcJnEYuBCYG/gRODdEVwuMR94ZPM4dCxwLMDixYu7tHZmZgazINkALwa+GcFDABIXTGGsw4B9KqcLdkxHMZcDn5Y4Gzg/go31jhGxBlgDxWm0KcRgZmY1syHZADT7cH+cxmm+3g7H2QpYFsHDtfLVEt8BjgCulDgsgpsmF6qZmU3UbEg2lwIjEqsp4nkV8EVgA7AU+BFwVKX9/dDymstFwHuAT0Jx51oE6yT2jGA9sF5iGbAX5Es2q1Z1VlcuV8sGB1vfMdLX1/yOrWbjtKpbtQpGRmD58s379PU1YqjGM55O2tTjaVZWzl+3YAGccMKmbefNgxUt7kGsxtNuvnZt2tWNN/54Y5T9q+/J6Oj4dwp1Om99/NHR5q/H08m6TabPZMbt5vx/iHNX5+9k35gJs+JuNImVwNuAO4CNwI+BbwNfBx4ALgHeEkG/xM4U12K2AT4ObEe6hiOxCDid4jpND3BpBMdJnAYcAjyRxl4ewaOt4pnq3WhmZluidnejzYpkM9s42ZiZTdysv/XZzMz+sDnZmJlZdk42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWnZONmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZedkY2Zm2TnZmJlZdk42ZmaWXdZkI/FaiZDYK9P4AxKn5hjbzMy6J/eRzTHAZcAbuz2wRE8EYxG8t9tjd9PQEAwPF8vDw8Xr3Mr56svdHHeumu51mCvbbOHCibUfGoL+/uIxPNx49Pc36sp2sPl2GBra/GehWtZuu9XrFi4cP/5285extoqjHuvQEGy1VWPde3qK+cttULapzlvOUY5Xfa4/hoYaY5bPZdty+/b2FuW9vY04hoZAarTv7W3EXh+/HKcsK8fp6SnWLQdFRJ6BxXzgZuAQ4III9pIYAj4M/BpYApwPrAfeB2wHvCaCWyV2Ac4AFqfhTojgcolhYDegH7gbWAOcGMEr03ynAQNAAB+O4DyJLwAHpPHPjWDVeLEPDAzE2NhYF7ZC8eYDRGy6nJPUmKO63M1x56rpXoe5ss0mGme5L7dT7vPV53r/VmXt4ulkrIn0Ga+uuj7V+laarXf1eS6Y7D4raW1EDDSr65lKQON4DfCvEfxU4h6J56fy5wJ7A/cAtwFnRvACifcBxwMnAKcAn4ngMonFwIWpD8BS4OAIHk7Jq/Q3wH0R/CmAxE6pfGUE90hsDVwssX8E12dbazMz20zOZHMM8Nm0/NX0+jvA1RHcCSBxK3BRarOe4igI4DBgn8pvATtK7JCWL4jg4SbzHUbldF0E96bFN0gcS7GuuwL7wObJRtKxwLEAixcvrlebmdkUZEk2Ek8BDgX2kwhga4pTW98FHq00fbLy+slKPFsBy+pJJSWfB1tNm+aotn8mcCJwQAT3SowAvc06R8QaitNyDAwMzIETH2Zmc0euGwSOAs6KoC+C/gh2B24HDu6w/0XAe8oXEksm0WcnYEeK5HSfxB8Dh3c4v5mZdVGu02jHAKtrZecB7wJu7aD/e4HTJa6niPFS4Lhx+nw09bkBeILiBoHzJa4FbqS4PnR556vQHYODjTtTVq2C0dH8c65a1Xy5m+POVdO9DnNlmy1YMLH2g4OwYUOxvHx5o3xkpLirqawbHCye69uhLG9V1m671es6ib3d/H197eOo/8wODsKll8LixcW6f/SjMH8+nHDC5mOU85ZzlOMNDTWe60ZH4bLLijEfeKB4XrKkaFtu3yuvLO4ge+QReNrTijhGR+EHPyi2x5IlRZsDDyzGrM4zOlq8P+UdaQCrVxfjbNwITz65eUzdkO1utLmsm3ejmZltKdrdjeZvEDAzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLThEx0zHMOpLuAu6YZPdFwN1dDCcHxzh1sz0+cIzdMNvjg9kVY19E7NKswsmmyySNRcTATMfRjmOcutkeHzjGbpjt8cHciBF8Gs3MzKaBk42ZmWXnZNN9a2Y6gA44xqmb7fGBY+yG2R4fzI0Yfc3GzMzy85GNmZll52RjZmbZOdl0kaRXSLpZ0s8krcg81+6Svi/pJ5JulPS+VL6zpO9JuiU971Tp84EU282S/lulfKmk9anuVElK5fMkfS2VXyWpfxJxbi3pWknfnqXxLZR0rqSb0rZcNgtj/Mv0Ht8g6R8l9c50jJK+JOk3km6olE1LTJLenua4RdLbJxDfJ9P7fL2kb0paOFPxtYqxUneipJC0aCZj7KqI8KMLD2Br4FZgD2Bb4Dpgn4zz7Qo8Py3vAPwU2Af4BLAila8ATk7L+6SY5gHPTLFunep+BCwDBPwLcHgq/wvgjLT8RuBrk4jz/cA5wLfT69kW31eAP0vL2wILZ1OMwNOB24Ht0uuvA8tnOkbgJcDzgRsqZdljAnYGbkvPO6XlnTqM7+VAT1o+eSbjaxVjKt8duJDiD8sXzWSMXf3Myj3BlvJIb/aFldcfAD4wjfP/M/Ay4GZg11S2K3Bzs3jSzrwstbmpUn4M8MVqm7TcQ/FXyppATM8ALgYOpZFsZlN8O1J8kKtWPptifDrw8/TB0AN8m+JDc8ZjBPrZ9MM8e0zVNqnui8AxncRXq3stcPZMxtcqRuBc4LnABhrJZsZi7NbDp9G6p/xQKG1MZdmlw+PnAVcBfxwRdwKk56eOE9/T03K9fJM+EfE4cB/wlAmE9lngJODJStlsim8P4C7gyypO9Z0pafvZFGNE/AL4FPAfwJ3AfRFx0WyKsWI6YurWz9n/pDgKmFXxSToS+EVEXFermjUxTpaTTfeoSVn2+8olzQfOA06IiP9q17RJWbQpb9enk7heCfwmItZ20r7NXFniS3ooTmN8ISKeBzxIcfpn1sSYrnu8muLUyW7A9pLeMpti7EA3Y5pyrJJWAo8DZ8+m+CT9EbAS+FCz6tkQ41Q42XTPRopzraVnAL/MOaGkbSgSzdkRcX4q/rWkXVP9rsBvxolvY1puFvfv+0jqARYA93QY3kHAkZI2AF8FDpX0D7MovrL/xoi4Kr0+lyL5zKYYDwNuj4i7IuIx4HzgRbMsxtJ0xDSln7N0MfyVwJsjnUOaRfHtSfFLxXXp5+YZwDWSnjaLYpy83OfptpQHxW/Jt1HsLOUNAvtmnE/AWcBna+WfZNOLtJ9Iy/uy6QXG22hcYLwaOJDGBcYjUvm72fQC49cnGesQjWs2syo+4IfAc9LycIpv1sQIvBC4EfijNPZXgONnQ4xsfs0me0wU165up7iwvVNa3rnD+F4B/BjYpdZuRuJrFmOtbgONazYzFlYVNmUAAAPGSURBVGO3Htk/hLekB3AExV1htwIrM891MMWh7/XAuvQ4guKc7MXALel550qflSm2m0l3rKTyAeCGVPc5Gt8s0Qt8A/gZxR0ve0wy1iEayWZWxQcsAcbSdvyn9MM322L8MHBTGv/v0wfOjMYI/CPFNaTHKH5Tfud0xURxveVn6fGOCcT3M4prFeXPyxkzFV+rGGv1G0jJZqZi7ObDX1djZmbZ+ZqNmZll52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGM2SZI+I+mEyusLJZ1Zef23kt4/ybGHlL4pu0ndwZJ+lL7B+CZJx1bqdknf8HutpBdLer2Kb7P+/iRi+OBkYjdrxsnGbPL+neKv+ZG0FbCI4o/vSi8CLu9kIElbd9juaRTfon1cROxF8fdWfy7pv6cmL6X4YsbnRcQPKf6+5C8i4pBOxq9xsrGucbIxm7zLScmGIsncANwvaSdJ84C9gWslvTQdaaxP/8NkHoCkDZI+JOky4PUq/h/STen1/2gx57uBkYi4BiAi7qb4stMVkpZQfM3/EZLWSVpFkYzOSP/LZd90RLQu/U+XP0lxvKVS/kUV/4NoNbBdKju7eShmneuZ6QDM5qqI+KWkxyUtpkg6V1B8e+4yim/YvZ7iF7oR4KUR8VNJZwHvovhGbIBHIuJgSb0Uf3l/KMVfdX+txbT7UnxlTdUYxVcjrZP0IWAgIt4DIOkQ4MSIGJN0GnBKRJwtaVtga0l7A0cDB0XEY5I+T/G9YSskvScilkx1O5mBj2zMpqo8uimTzRWV1/8OPIfiizR/mtp/heKfZpXKpLJXandLFF/r8Q8t5hPNv6G3k68CuQL4oKT/DfRFxMMUp92WAldLWpde79HBWGYT4mRjNjXldZs/pTiNdiXFkU15vabZ17lXPVhZ7iRh3EjxXVhVSym+YLKtiDgHOBJ4GLhQ0qEpvq9ExJL0eE5EDHcQh9mEONmYTc3lFF9Zf09EPBER91D8a+llFEcSNwH9kp6V2r8V+EGTcW4Cnilpz/T6mBbznQ4sT9dnkPQUin9x/InxApW0B3BbRJwKXADsT/GFmUdJempqs7OkvtTlsfRvLMymzMnGbGrWU9yFdmWt7L6IuDsiHgHeAXxD0nqK/1p6Rn2Q1O5Y4DvpBoE7mk0WxX/AfAvwd5Juojiy+lJEfKuDWI8Gbkiny/YCzoqIHwN/DVwk6XrgexT/ahhgDXC9bxCwbvC3PpuZWXY+sjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPL7v8DGpHZmvSwX94AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 计算指定单词从文本开头算起在它前面有多少词\n",
    "text4.dispersion_plot([\"citizens\", \"democracy\", \"freedom\", \"duties\", \"America\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building ngram index...\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "laid by her , and said unto Cain , Where art thou , and said , Go to ,\n",
      "I will not do it for ten ' s sons ; we dreamed each man according to\n",
      "their generatio the firstborn said unto Laban , Because I said , Nay ,\n",
      "but Sarah shall her name be . , duke Elah , duke Shobal , and Akan .\n",
      "and looked upon my affliction . Bashemath Ishmael ' s blood , but Isra\n",
      "for as a prince hast thou found of all the cattle in the valley , and\n",
      "the wo The\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\"laid by her , and said unto Cain , Where art thou , and said , Go to ,\\nI will not do it for ten ' s sons ; we dreamed each man according to\\ntheir generatio the firstborn said unto Laban , Because I said , Nay ,\\nbut Sarah shall her name be . , duke Elah , duke Shobal , and Akan .\\nand looked upon my affliction . Bashemath Ishmael ' s blood , but Isra\\nfor as a prince hast thou found of all the cattle in the valley , and\\nthe wo The\""
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text3.generate()  # 随机生成之指定文本风格的文字（不一定有意义）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['!',\n",
       " \"'\",\n",
       " '(',\n",
       " ')',\n",
       " ',',\n",
       " ',)',\n",
       " '.',\n",
       " '.)',\n",
       " ':',\n",
       " ';',\n",
       " ';)',\n",
       " '?',\n",
       " '?)',\n",
       " 'A',\n",
       " 'Abel',\n",
       " 'Abelmizraim',\n",
       " 'Abidah',\n",
       " 'Abide',\n",
       " 'Abimael',\n",
       " 'Abimelech']"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(set(text3))[:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['In',\n",
       " 'the',\n",
       " 'beginning',\n",
       " 'God',\n",
       " 'created',\n",
       " 'the',\n",
       " 'heaven',\n",
       " 'and',\n",
       " 'the',\n",
       " 'earth']"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text3[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16.050197203298673"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(text3) / len(set(text3))  # 每个词平均使用的次数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text3.count(\"smote\")  # 计算特定词出现的次数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "FreqDist({',': 18713, 'the': 13721, '.': 6862, 'of': 6536, 'and': 6024, 'a': 4569, 'to': 4542, ';': 4072, 'in': 3916, 'that': 2982, ...})"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fdist1 = FreqDist(text1)  # 获取指定文本的词频信息（升序）\n",
    "fdist1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3916"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fdist1[\"in\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[',',\n",
       " 'the',\n",
       " '.',\n",
       " 'of',\n",
       " 'and',\n",
       " 'a',\n",
       " 'to',\n",
       " ';',\n",
       " 'in',\n",
       " 'that',\n",
       " \"'\",\n",
       " '-',\n",
       " 'his',\n",
       " 'it',\n",
       " 'I',\n",
       " 's',\n",
       " 'is',\n",
       " 'he',\n",
       " 'with',\n",
       " 'was']"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 按词频降序显示文本值\n",
    "sorted(fdist1, key = lambda i : fdist1[i], reverse=True)[:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Herman',\n",
       " 'Melville',\n",
       " ']',\n",
       " 'ETYMOLOGY',\n",
       " 'Late',\n",
       " 'Consumptive',\n",
       " 'School',\n",
       " 'threadbare',\n",
       " 'lexicons',\n",
       " 'mockingly',\n",
       " 'flags',\n",
       " 'mortality',\n",
       " 'signification',\n",
       " 'HACKLUYT',\n",
       " 'Sw',\n",
       " 'HVAL',\n",
       " 'roundness',\n",
       " 'Dut',\n",
       " 'Ger',\n",
       " 'WALLEN']"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fdist1.hapaxes()[:20]  # 检索只出现一次的词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEaCAYAAAAsQ0GGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOydd5iU1fX4P2eXpfe+gNJBAUXdBWzY0GDHRI0aFWNMTIxfS0xR0jRGE01M8rNEowZ7RWKiqIgKKBbaLr0qHaSzy1K2sOX8/rh3dmfHmd3ZZcqW83meeead877vPefOvPOee+85976iqhiGYRhGrElJtgGGYRhGw8QcjGEYhhEXzMEYhmEYccEcjGEYhhEXzMEYhmEYccEcjGEYhhEXmiTbgLpC586dtU+fPrU+v6CggBYtWsRNnggdpjvxuhOhw3QnXncidCRCdzRkZ2fvVtUuYXeqqr1UycjI0MMhKysrrvJE6DDdDVOH6W6YOhKhOxqALI1wX7UhMsMwDCMumIMxDMMw4oI5GMMwDCMumIMxDMMw4oI5GMMwDCMumIMxDMMw4oI5GMMwjEaIqrI9r5CPVuzgvTUH46IjbhMtReQZ4EJgp6oO87K/AhcBh4C1wPWqutfvmwDcAJQCt6rqNC/PAJ4DWgDvAbepqopIM+AFIAPYA1yhqhv8OdcBv/Wm3Keqz8ernoZhGHWdsjJlU04+n28u4KNdq1j2dR4rtu5jz8FDgOtp/OLbpbRomhpTvfGcyf8c8BjOCQT4EJigqiUi8iAwAbhTRIYAVwJDgR7ARyIySFVLgSeAG4E5OAdzLjAV54xyVXWAiFwJPAhcISIdgbuBTECBbBF5W1Vz41hXwzCMOkFJaRmb8orZkL2FZVvzWL51Hyu27uNAUYk/Iq/82HYt0hjaoy2dUwsoLK5HDkZVZ4lInxDZB0Ef5wCX+e1xwGuqWgSsF5E1wEgR2QC0VdXZACLyAnAJzsGMA+7x508GHhMRAcYCH6pqjj/nQ5xTejXGVTQMw0gqh0rK+HLHfpZ9nceyrXks+3ofK7fto6ikDDewU0G3ts3o1QpOOfoIhvRox7CebenZvgUiQnZ2Nh1aNY25fclci+wHwOt+uyfO4QTY4mXFfjtUHjhnM4DvEeUBnYLlYc4xDMOolxQWl7J6+36Wfp3H8q15zP1yN5vffJ/i0m8+9r5bq1Qy+nVhaI92DO3RlqE92tGlTTOys7PJyBicMJvFLSUTp8JdD+adQAwmSP4b3BDWd3w85Z/AbFV9ye+fiBsO2wT8WVXP9vLRwK9U9SIRWQ6MVdUtft9aYCTOcTVT1fu8/HdAvqr+LYx9N+KG30hPT8+YMmVKreuan59Py5Yt4yZPhA7TnXjdidBhuhOv+3DLKipVNu4tZm1uMV/uKmTTfmXzvhJCfYkA6W1S6dc+jX4d0ujXoQl926eRUlIYs3pUR2ZmZraqZobdGWmRsli8gD7AshDZdcBsoGWQbAIuNhP4PA04CUgHVgXJrwKeDD7GbzcBdvvvu/wYv+9J4KrqbLXFLk13Q9Vhuuu2joJDJbpgY47+8bVP9BeTFunYf3yi/Sa8q73vfKfSq+9d7+jZf/tYf/baQv33p+v0+alf6P7C4rjXozqoYrHLhA6Rici5wJ3A6aqaH7TrbeAVEfk7Lsg/EJinqqUisl9ETgTmAuOBR4POCTiry4AZqqoiMg34k4h08Md9C+fADMMwkkppmbIpr5h1WZtZvGUvizfnsWr7vqBhrv0ApAgM7taGYT3b0b4sj/NPGsbR6W1p2bTilp2dnUPrZnX7iSvxTFN+FTgD6CwiW3CZXROAZsCHLh7PHFX9iaouF5FJwAqgBLhZXQYZwE1UpClP9S+AicCLPiEgB5eFhqrmiMgfgfn+uHvVB/wNwzASyc79hSzatJeFm/eyaNNeln6d57O5KgLwIjCoW2t6NC/hzOH9GdazHUPS25ZndGVnZ5PRu2OSanB4xDOL7Kow4olVHH8/cH8YeRYwLIy8ELg8QlnPAM9EbaxhGMZhUlhcyvKteSzctJeFm/Yyd+1Odr+x/RvHdW6Zwsj+XRneqz3H9mrPMb3a0bpZEx+A75N4w+NI3e5fGYZh1EFU3cTFWRsLeHvLMhZu3suKrfsoKaschW/drAnH9mrHcUe0d68j27P5y+VkZGQkyfLEYg7GMAyjGgpKyvhi7W7fO8ll4aa95bPgAxMXUwSO6t6G4490zqTp/q1cfMYoUlOkUlmbaTyYgzEMwwhhx75C5m/IIWtDLvM35LBy6z7K2FnpmE6tmtK3rXDmsX04/kg33BUcdM/O3vUN59LYMAdjGEajRlVZu+sA8zfk8n72XtZNn8HmnIJKx6QKHNOjHScc2Z7jj+zACUd24IiOLViwYAEZGQOSZHndxxyMYRiNiuLSMr7cc4gFs9Yxb0MOWRtyyM0vrnRM62ZNOKF3B0b07kBmn47o7vWcPCr8XEIjMuZgDMNo0JSWKSu37ePzNbv5fO0e5q/PoaC4FDe7wdG1TTNG9O1I95QDfOe04RzVvW2l4a3svRsSb3gDwByMYRgNClVlw558pq45yNMrspm9bg95BZV7KD3apDJ6cA9G9O3IyD4dOaJjxaKPQ3u0S5LlDQ9zMIZh1HvyCoqZvXY3n3y5m0+/2sWW3EAMxc2M79WhBaf078zJAzpxUv9OPlX42OQZ3EgwB2MYRr2jtExZsmUvn3y5i6kL9/DV5A8InoLSoWUaQzqlctGIgZwyoDNHdKy8kGNjShVOJuZgDMOoF+QUlDIpazOzvtzFZ2t2szcoMN8kRRjRpwOnDerC6IGdGdqjHYsWLiAj48gkWmyYgzEMo05yqKSM7I25fPLlLj5evZNV2/cDu8r3H9mxJacP6kLP1Dyu+daoOr/wY2PEfhHDMOoMW/cW8MG6fJ5cnsUXa/cEPeYXmqUKpwzswumD3KtP51aAWwzSnEvdxH4VwzCShqqyavt+Pli+gw9XbmfZ1/v8Hvc+sGtrzhjchdMHdaVJ7gZOHGlzUeoT5mAMw0goJaVlzN+Qy8uL9nHbRzODMr6gZdNUhnVuwiWjBnHaoM706lARnM/O3pgMc43DwByMYRhxp7C4lM/X7Gba8u18uGJHpZnznVs35eyju3HOkG6cMqAzy5cssuB8A8EcjGEYceFAUQmfby7gmdUL+HjVTg4eKi3f16dTS4Z3FsafdSzHHdGh0S8K2VAxB2MYRszIyy/mo5U7mLpsG7O+2s2hkjICy9kP7dGWsUO7M3ZodwZ1a+0WiqynT2o0osMcjGEYh8WeA0V8uC6fhxfP44s1u8sfuiUCR3dO49JRAxg7tPs3JjsaDR9zMIZh1Jid+wqZtnw77y3dztz1e8pn0aemCKcM6MS5w9IZO6Qbm79aTkZGv+QaayQNczCGYUTFtrwC3vnqIA/On838jTmodyppqcLwLmlcecpgzhnSnY6tmpafY0uyNG7MwRiGEZGv9xYwdek23l26jYWb9pbLmzZJ4bSBXTj/mO6MOboba1Ysscwv4xuYgzEMoxKbc/J5a/VB7p3zOYs3VziV5mkpDO+axtWnDeGso7ra7HmjWuwKMQyDbXkFvLtkG+8s2caiIKfSIi2Vs47qyvnHpHPmUV1YuXQxGcN7JNFSoz5hDsYwGik79xfy3pqDPDD/C+ZvyC2Xt2yayvHd0rjmtCGcMbgrLZqmJtFKoz5jDsYwGhF78w/x/rLtTFmyldlrK7K/mjVJ4czBXbloeA/OOqorK5YuIuOY9OQaa9R7zMEYRgMnv7iM/y7cwpTF25j15a7yeSppqcLxXZty7WlHc/aQbhZTMWKOXVGG0QApLC7l49W7eHvx13y0fCeHynYCkCIwemBnLjq2B2OHdmfNyiVkHN8zydYaDRVzMIbRQChV5fM1u3lr0ddMXbad/YUVz1IZ2acjFw1P57xj0unculkSrTQaE+ZgDKOes3r7ft7I2szkrF3sLdxRLh/aoy3jjutBb9nD2NEjk2ih0VgxB2MY9ZC8gmLeXryVyVmbWbwlr1zeu1NLxh3Xk4uH92BA19YAZGfvjVSMYcSVuDkYEXkGuBDYqarDvKwj8DrQB9gAfFdVc/2+CcANQClwq6pO8/IM4DmgBfAecJuqqog0A14AMoA9wBWqusGfcx3wW2/Kfar6fLzqaRiJoqxM+WLtHv41Zy/z/vuRX6kY2jRvwsXDezCs1QGuPOdERGzpe6NuEM8ezHPAYzgnEOAuYLqqPiAid/nPd4rIEOBKYCjQA/hIRAapainwBHAjMAfnYM4FpuKcUa6qDhCRK4EHgSu8E7sbyAQUyBaRtwOOzDDqG9vzCnkjazOvZ20uf/qjCJw6oDOXZ/Zi7NDuNE9LJTs725yLUaeIm4NR1Vki0idEPA44w28/D3wM3Onlr6lqEbBeRNYAI0VkA9BWVWcDiMgLwCU4BzMOuMeXNRl4TNy/ayzwoarm+HM+xDmlV2NdR8OIFyWlZczfWsjjS+czc/XO8vkqPdu34NQeqdxy0YhKjxM2jLqIaGBJ1HgU7hzMO0FDZHtVtX3Q/lxV7SAijwFzVPUlL5+IcyIbgAdU9WwvHw3cqaoXisgy4FxV3eL3rQVGAd8HmqvqfV7+O6BAVR8KY9+NuN4R6enpGVOmTKl1XfPz82nZ8pt/+FjJE6HDdCded+i+7QdKmL6+gJkbCsgtdENgTQRG9GzO2X1bcGy3phQWFNSb+jVW3YnQkQjd0ZCZmZmtqplhd6pq3F64WMuyoM97Q/bn+vd/AtcEyScClwIjgI+C5KOBKX57OdAraN9aoBPwS+C3QfLfAT+vztaMjAw9HLKysuIqT4QO050cHbPnzdcpi7/Wq5+eo73vfKf8ddJ9U/WpT9bqrv2FcdPdkL/bZP+uDaF+0QBkaYT7aqKzyHaISLqqbhORdGCnl28Bjgg6rhew1ct7hZEHn7NFRJoA7YAcLz8j5JyPY1sNwzh8Nu3J56W5G3ltzk72HXLpxc2apHDBMelcNepIZPc6MjPtYV1G/SXRDuZt4DrgAf/+VpD8FRH5Oy7IPxCYp6qlIrJfRE4E5gLjgUdDypoNXAbMUFUVkWnAn0Skgz/uW8CE+FfNMKpHVZmzLodnPl/PRyt3lD+066jubbhq5JFcclxP2rVMAyB7z/okWmoYh08805RfxfUkOovIFlxm1wPAJBG5AdgEXA6gqstFZBKwAigBblaXQQZwExVpylP9C9ww2os+ISAHl4WGquaIyB+B+f64e9UH/A0jWRwqVd7I2swzn29g5bZ9ADRNTeHC4emMaF9g6cVGgySeWWRXRdg1JsLx9wP3h5FnAcPCyAvxDirMvmeAZ6I21jDixJ4DRbw4ZyPPfrqLvCI3DNa5dVOuHtWbq088kq5tmlt6sdFgsZn8hhEH1u46wMTP1vOf7C0U+QmRQ9Lb8oNT+3LR8HSaNbFnrBgNH3MwhhEjVJUVuw7xxPNZTF9VEV8Zc1RXTutWzPhzT7KeitGoMAdjGIdJaZkybfl2npy1rvwZ9k2bpPCd43vyw9F9GdC1jQ2DGY0SczCGUUsKDpXyRvZm/v3pejbl5APQpqlw/an9ufakPnRpY8viG40bczCGUUP2HCjiteX7+ejd6eTmFwNwZMeW/Gh0X/qn7ObkUYOTbKFh1A2qdTAi0gq31EqZiAwCjgKmqmpx3K0zjDrErv1FPDVrLS/O2UhhsQvcDz+iPT8+rR9jh3YnNUXIzt6TZCsNo+4QTQ9mFjDaT1ycDmQBVwBXx9Mww6grhHMsGenN+NVFxzOyb0eLrRhGBKJxMKKq+X5y5KOq+hcRWRhvwwwj2ew+UMTzi/fxwf9mlDuWs4/uxu1nD6Ro+xoy+nVKsoWGUbeJysGIyEm4HssNNTjPMOolew4U8eSsdbw4eyMFxW5BibOP7sZtYwZyTK92AGRvT6aFhlE/iMZR3IZby+u/fkmXfsDM+JplGIkn5+Ahnpq1jhdmbyD/kHMsGenNuOfSEeWOxTCM6InGwXRT1YsDH1R1nYh8GkebDCOh7C8q4y/vr+L5LzZw0DuWs47qyu1nD6R4x1pzLoZRS6JxMBOAN6KQGUa9Iq+gmImfruPpWbsoKHFPjjhjcBduP3sQxx3hnouXvSOZFhpG/SaigxGR84DzgZ4i8kjQrra4FY8No15yoKiEZz9bz9OfrmNfobuUTxvUhdvPHsgJR3ao5mzDMKKlqh7MVlxK8sVAdpB8P/CzeBplGPGgsKSMf32ylic/WVs+QfLk/p244Ejl6rEjk2ydYTQ8IjoYVV0MLBaRV2xSpVGfKSwu5eW5m3jkw93kFbmhsMzeHbjjW4M4uX9nsrOzqynBMIzaEE0MZqSI3AP09scLoKpqz3I16jSHSsp4PWsz/5yxhu37CgEY3qsdd3xrMKcN7GwTJA0jzkTjYCbihsSygdJqjjWMpFNapkyav5lHZnzFltwCAI5Ob8u4fin8+MKTzbEYRoKIxsHkqerU6g8zjORSVqZMWbKVB6btZtsBl/41sGtrfnbOIM4d2p2FCxeYczGMBBKNg5kpIn8F3gSKAkJVXRA3qwyjBqgqH67Ywd8++JLVO/YD0KdTS24/exAXDe9Baoo5FcNIBtE4mFH+PTNIpsBZsTfHMGrG52t285dpq8sf9NWzfQvGDUjjjm+fQpPUlCRbZxiNm2odjKqemQhDDKMmLNyUyz2f5LB0p1sUrHPrpvzfmQO4atSRLFu8yJyLYdQBonkezO/DyVX13tibYxhVs2bnAf46bRXTlrsYS9vmTfjx6f35/sl9aNXM1mA1jLpENP/Ig0HbzYELgZXxMccwwrM9r5CHp3/J6/M3U6bQPC2F8/q34J7vnkK7lmnJNs8wjDBEM0T2t+DPIvIQ8HbcLDKMIPIKinlp6X7e++9MikrKSE0RvjfyCG4bM5AtXy0352IYdZjajCm0BGySpRFXVJU3srfwp/dWstcv63L+Md35+bcG079LawC2JNNAwzCqJZoYzFJc1hhAKtAFsPiLETe25OYz4c2lfPrVbgCGdmnK/d8dUb7CsWEY9YNoejAXBm2XADtU1VZTNmJOWZny8tyNPDB1FQcPldK+ZRr3XDSUXqXbzLkYRj0kmhjMRhEZDoz2olnAkrhaZTQ6th0o4S9Pz2Hu+hzADYf94eJhdGnTjGx7PrFh1EuiGSK7DfgRbiY/wMsi8pSqPhpXy4xGQVmZ8twXG3jgg90cKnXzWe4dN4zzj0lPtmmGYRwm0cxGuwEYpaq/V9XfAyfiHE6tEZGfichyEVkmIq+KSHMR6SgiH4rIV/69Q9DxE0RkjYisFpGxQfIMEVnq9z0ifqEpEWkmIq97+VwR6XM49hrxYXNOPt/79xzufWcFh0rh28f35MOfnW7OxTAaCNE4GKHyKsqlXlYrRKQncCuQqarDcIkDVwJ3AdNVdSAw3X9GRIb4/UOBc4HHRSTVF/cEcCMw0L/O9fIbgFxVHQD8A3iwtvYasUdVeX3+Js57+FPmrMuhc+um3Hlye/5xxXF0aNU02eYZhhEjonEwzwJzReQe/1yYObgl/A+HJkALEWmCS3veCowDnvf7nwcu8dvjgNdUtUhV1wNrcM+oSQfaqupsVVXghZBzAmVNBsYEejdGcsktKOWG57O48z9LOVBUwvnHdOeDn53OyJ7Nk22aYRgxJpog/99F5GPgVFzP5XpVXVhbhar6tZ+suQkoAD5Q1Q9EpJuqbvPHbBORrv6UnjinFmCLlxVTeSpEQB44Z7Mvq0RE8oBOwO7a2m0cPlOXbuOXH+zmwCGlbfMm/PGSYVw8vAciwvpkG2cYRswR1/gPs0NkBNA59FkwInIx8LWq1uo5sz628h/gCmAv8Aaul/GYqrYPOi5XVTuIyD+B2ar6kpdPBN7DOag/q+rZXj4a+JWqXiQiy4GxqrrF71sLjFTVPSG23IgbYiM9PT1jypQptakSAPn5+bRs2TJu8kToiJfu4jLlhcX7eW9NPgDHd2/KTZnt6NQitdpyElG/ZOpOhA7TnXjdidCRCN3RkJmZma2qmWF3qmrYF/Ax0CeMfAAwI9J51b2Ay4GJQZ/HA48Dq4F0L0sHVvvtCcCEoOOnASf5Y1YFya8Cngw+xm83wfVcpCq7MjIy9HDIysqKqzwROuKhe3POQb34sc+0953v6IBfv6v3vvqJlpWVJUT34cobig7T3TB1JEJ3NABZGuG+WlUMppOqbgjjkNbghptqyybgRBFp6eMiY3CLZ74NXOePuQ54y2+/DVzpM8P64oL589QNp+0XkRN9OeNDzgmUdRnOIYbvqhlxY/rKHVzwyGcs3ryXnu1bMPknJ3P+wFb2VEnDaCRUFYNpUcW+VrVVqKpzRWQysAC3MsBC4CmgNTBJRG7AOaHL/fHLRWQSsMIff7OqBrLabgKe87ZO9S9wSQgvisgaIAeXhWYkiJLSMl5csp//rc4CYMxRXfnbd4fTvmVTsncm2TjDMBJGVQ7mIxG5H/htcOtfRP4AzDgcpap6N3B3iLgI15sJd/z9wP1h5FnAsDDyQryDMhLLzv2F/N8rC5m3/iCpKcKvxg7mR6P7kWKPLTaMRkdVDubnwL+BNSKyyMuGA1nAD+NtmFH/yN6Yy09fzmbHviI6NE/hyetGMbJvx2SbZRhGkojoYFT1IHCViPTDTXIEWK6q6xJimVFvUFVemruJe6csp7hUGdm3IzcOSzXnYhiNnGjmwawDzKkYYSkqVX45eQmTs92UpB+c0pcJ5x/FkkW1niplGEYDwR5ibtSazTn5/HbGHtbtLaF5WgoPXnos447rWf2JhmE0CszBGLVi9to9/PTlbHLzSziyY0uevDaDo9PbJtsswzDqEFE5GBE5FRioqs+KSBegtbp1wYxGyCtzN/H7t5ZRUqac0L0Zz954Ku1apiXbLMMw6hjRPA/mbiATGIxb+DINeAk4Jb6mGXWNktIy7nt3Jc99sQGAH5/ejzFdDppzMQwjLNGspvxt4GLgIICqbgXaxNMoo+5x4FAZ3392Ps99sYGmqSk8dPlwJpx3NKk2K98wjAhEM0R2SFVVRBRARGo9i9+on6zbdYAJ0/ew9UApnVs35clrM8jobSnIhmFUTTQOZpKIPAm0F5EfAT8Ano6vWUZdYfbaPfz4xSz2FZZydHpb/n1dJj3bV7WKkGEYhiOaeTAPicg5wD5cHOb3qvph3C0zks605du55dWFHCopY2SPZjz745No1cwSDw3DiI5ogvw/A94wp9K4mJS1mbv+s4QyhWtP7M1FvQrNuRiGUSOiCfK3BaaJyKcicrOIdIu3UUZyeWrWWn412TmXW8cM5N5xQy2YbxhGjanWwajqH1R1KHAz0AP4REQ+irtlRsJRVR6Yuoo/vbcKgHsuGsId5wyy57cYhlErajLmsRPYDuwBusbHHCNZlJSW8UT2Pqav30GTFOGhy4dzyfG27IthGLWn2h6MiNwkIh8D04HOwI9U9dh4G2YkDlXlrjeXMn19Ac3TUnh6fKY5F8MwDptoejC9gdtVdVG1Rxr1kqc/Xcfk7C00SxVeumEUmX1sjothGIdPRAcjIm1VdR/wF/+50l1HVXPibJuRAGau2smfp7qYyy0j25lzMQwjZlTVg3kFuBDIBhQIjvQq0C+OdhkJ4Ksd+7nl1YWowh3nDOKk9vuSbZJhGA2Iqp5oeaF/75s4c4xEkXvwEDc8n8WBohIuODadW84awIIFC5JtlmEYDYhogvzTo5EZ9YeSMuWnLy9gU04+w3q25aHLhlsqsmEYMaeqGExzoCXQWUQ6UDFE1hY3H8aopzyzaB+z1xXQpU0znh6fSYumqck2yTCMBkhVMZgfA7fjnEk2FQ5mH/DPONtlxIkX52xk2toCmjZJ4alrM0hvZwtXGoYRH6qKwTwMPCwit6jqowm0yYgT89bn8Ie3lwPw4KXHcPyRHZJskWEYDZloVlN+VESGAUOA5kHyF+JpmBFbtucV8tOXsykpUy4e1JJvH98r2SYZhtHAifaRyWfgHMx7wHnAZ4A5mHpCUUkpP3kpm90HDnFy/05cc4ytimwYRvyJZjXly4AxwHZVvR4YDjSLq1VGTLnn7eUs2ryXnu1b8Nj3TiA1xTLGDMOIP9E4mAJVLQNKRKQtbtFLm2RZT3hl7iZenbeZZk1SePLaDDq2appskwzDaCREM1aSJSLtcY9JzgYOAPPiapURE1bvOcTdnywD4E/fPoZhPdsl2SLDMBoT0QT5f+o3/yUi7wNtVXVJfM0yDped+wt56Iu9FJcq3z+5D5dmWFDfMIzEEnGITEROCH0BHYEmfrvWiEh7EZksIqtEZKWInCQiHUXkQxH5yr93CDp+goisEZHVIjI2SJ4hIkv9vkfET0cXkWYi8rqXzxWRPodjb32jpLSMm19eQE5hGSP7dOQ3FxydbJMMw2iEVNWD+VsV+xQ46zD0Pgy8r6qXiUhT3IoBvwamq+oDInIXcBdwp4gMAa4EhuImfX4kIoNUtRR4ArgRmIPLcDsXmArcAOSq6gARuRJ4ELjiMOytVzwyYw3zN+TSsXkKj119PGmp0YTaDMMwYktVEy3PjIdCnyhwGvB9r+cQcEhExuHSoQGeBz4G7gTGAa+pahGwXkTWACNFZANuuG62L/cF4BKcgxkH3OPLmgw8JiKiqhqPOtUlsjbk8NiMrxCBW0e1o2ub5tWfZBiGEQekunuuiIwPJ6/tREsROQ54CliBS3nOBm4DvlbV9kHH5apqBxF5DJijqi95+UScE9kAPKCqZ3v5aOBOVb1QRJYB56rqFr9vLTBKVXeH2HIjrgdEenp6xpQpU2pTJQDy8/Np2bJl3OTRnHOwuIyff7CbXfllXDK4FZcOSE2Y7njJG7ruROgw3YnXnQgdidAdDZmZmdmqmhl2p6pW+QIeDXo9DawDJld3XhXlZQIluBs+uOGyPwJ7Q47L9e//BK4Jkk8ELgVGAB8FyUcDU/z2cqBX0L61QKeq7MrIyNDDISsrK67yaM657dUF2vvOd/TCRz7VouLShOqOl7yh606EDtPdMHUkQnc0AFka4b4aTRbZLcGfRaQd8GIUji0SW4AtqjrXf56Mi7fsEJF0Vd0mIum4+TaB448IOjED88sAACAASURBVL8XsNXLe4WRB5+zRUSaAO2ABv0Ezv8t/Jr/LdpKi7RU/t+Vx9G0icVdDMNILrW5C+UDA2urUFW3A5tFZLAXjcENl70NXOdl1wFv+e23gSt9Zlhfr3ueqm4D9ovIiT57bHzIOYGyLgNmeE/bINlxsITf/c/Nd7n7oiH079I6yRYZhmFEtxbZFFzWGDiHNASYdJh6bwFe9hlk64DrfdmTROQGYBNwOYCqLheRSTgnVALcrC6DDOAm4DmgBS4uM9XLJwIv+oSAHFwWWoOkpLSMR+bmsb+ohLFDu3HFiCOqP8kwDCMBRDOT/6Gg7RJgo/rgeW1R1UW4WEwoYyIcfz9wfxh5FjAsjLwQ76AaOv+cuZZVe4rp1rYZD3znWHsypWEYdYZoYjCfQHl6cRO/3VFVG3RMoz6wYFMuj8z4CgH+/t3j6GDrjBmGUYeIZojsRlyWVwFQhnuypWILXiaVA0Ul3P7aIkr9811OGdA52SYZhmFUIpohsl8CQzVkDomRXO55ezmbcvIZkt6W7w2zxx4bhlH3iCaLbC0uc8yoI7y3dBuTs7fQrEkKj1x1HGmpFncxDKPuEU0PZgLwhYjMBYoCQlW9NW5WGRHZllfAhDeXAvCbC45mQNc2ZG9OslGGYRhhiMbBPAnMAJbiYjBGkihT5eeTFpNXUMyZg7tw7Ym9k22SYRhGRKJxMCWqekfcLTGqZcqX+Xyxdj+dWjXlL5cNt5RkwzDqNNHEYGaKyI0iku6f2dJRRDrG3TKjEsu35vHK0v0A/OWyY+nSplmSLTIMw6iaaHow3/PvE4JklqacQAoOlXLba4soUbj2xN6MObpbsk0yDMOolmgmWvZNhCFGZB6b+RVrdh6gZ5tUfn2+PZ3SMIz6QTQTLWP6PBijZuzYV8jEz9YDcPOIdrRomppkiwzDMKIjmiGyEUHbzXHrhS0AzMEkgP/30VcUFpdx7tDuDO6UbGsMwzCiJxnPgzGiZO2uA0zK2kyKwC/GDiZv8+pkm2QYhhE1CX8ejBE9D01bTWmZcsWIIxjQ1Z7xYhhG/SJZz4MxqmHhplymLttOsyYp3DZmULLNMQzDqDFJeR6MUTWqyoPvrwLgB6f2pXu75km2yDAMo+ZEdDAiMgDoFngeTJB8tIg0U9W1cbeukfLJl7uYsy6Hdi3S+Mnp/ZNtjmEYRq2oKgbz/4D9YeQFfp8RB8pUefB9F8y/+cz+tGuRlmSLDMMwakdVDqaPqi4JFfrHFPeJm0WNnM82FbJy2z56tGvO+JP6JNscwzCMWlOVg6lq4N+ecBUHikpKeXXZAQBuP2cQzdNsUqVhGPWXqhzMfBH5UahQRG4AsuNnUuPllbmb2JlfyqBurbn0hF7JNscwDOOwqCqL7HbgvyJyNRUOJRNoCnw73oY1NgoOlfLPmS5v4hffGkxqii3FbxhG/Saig1HVHcDJInImMMyL31XVGQmxrJHx6rxN7D5QRP8OTThniK2WbBhG/SeapWJmAjMTYEujpbC4lH994novlw9pbQ8SMwyjQVCbpWKMGPPavE3s3F/EkPS2ZKbbg8QMw2gYmINJMoXFpTzhey+3jhlovRfDMBoM5mCSzBtZm9mxr4ijurfhWxZ7MQyjAWEOJokUlZTy+Meu93LbmIGkWOaYYRgNCHMwSeSNrC1syytkcLc2jB3aPdnmGIZhxJSkORgRSRWRhSLyjv/cUUQ+FJGv/HuHoGMniMgaEVktImOD5BkistTve0R8AENEmonI614+V0T6JLp+1VFcpjzhey+3jBlgvRfDMBocyezB3AasDPp8FzBdVQcC0/1nRGQIcCUwFDgXeFxEAmuoPAHciHsA2kC/H+AGIFdVBwD/AB6Mb1VqzscbCvh6bwEDu7bm/GHpyTbHMAwj5iTFwYhIL+AC4N9B4nHA8377eeCSIPlrqlqkquuBNcBIEUkH2qrqbFVV4IWQcwJlTQbGSB1KzyouLePNlQcBuMViL4ZhNFDE3ZsTrFRkMvBnoA3wC1W9UET2qmr7oGNyVbWDiDwGzFHVl7x8IjAV2AA8oKpne/lo4E5f1jLg3MCD0URkLTBKVXeH2HEjrgdEenp6xpQpU2pdp/z8fFq2bBmV/KP1+TyRtY+ebVL5x9jOpAb5vkjl1FRHLOWmu37rMN2J150IHYnQHQ2ZmZnZqpoZdqeqJvQFXAg87rfPAN7x23tDjsv17/8ErgmSTwQuBUYAHwXJRwNT/PZyoFfQvrVAp6rsysjI0MMhKysrKnlxSamOfnCG9r7zHf3fwi1Rl1MTHbGWm+76rcN0N0wdidAdDUCWRrivRvPI5FhzCnCxiJyPeyRAWxF5CdghIumqus0Pf+30x28Bjgg6vxew1ct7hZEHn7NFRJoA7YCceFWoJry7dBubcvLp3jqVC4/tkWxzDMMw4kbCYzCqOkFVe6lqH1zwfoaqXgO8DVznD7sOeMtvvw1c6TPD+uKC+fNUdRuwX0RO9PGV8SHnBMq6zOtI/FhgCKoVmWOXDG5lKyYbhtGgSUYPJhIPAJP882Y2AZcDqOpyEZkErABKgJtVtdSfcxPwHO4BaFP9C9ww2osisgbXc7kyUZWoipmrd7Jq+366tW3GGb3tmW2GYTRskupgVPVj4GO/vQcYE+G4+4H7w8izqHiUQLC8EO+g6hKP++e9/PDUfqSl5ibZGsMwjPhiM/kTxLz1OWRtzKVdizSuGnVkss0xDMOIO+ZgEsQTH68B4LqT+9C6WV0amTQMw4gP5mASwIqt+5i5ehct0lL5/sl9km2OYRhGQjAHkwACz3u5cuQRdGzVNMnWGIZhJAYbq4kz2w6U8O6S3aSlCj8a3S/Z5hiGYSQM68HEmbdXH6RM4ZLjetKjvaUmG4bReDAHE0d27itkxoYCROAnZ/RPtjmGYRgJxRxMHJn42XpKyuDcod3p36V1ss0xDMNIKOZg4sTBohJenrsJgJus92IYRiPEHEycyNqYy4GiEgZ0SOPYXu2rP8EwDKOBYQ4mTsxf7xZvHtIlLcmWGIZhJAdzMHFi/oaAg7F5L4ZhNE7MwcSBQyVlLNq8F4DBnczBGIbRODEHEweWfp1HUUkZA7q2pm0z+4oNw2ic2N0vDmT54bERfTom2RLDMIzkYQ4mDswvdzAdkmyJYRhG8jAHE2PKypSsje5hYtaDMQyjMWMOJsas3XWAvfnFdG/bnF4dbO0xwzAaL+ZgYsw8PzyW2acDIpJkawzDMJKHOZgYk7XBDY+N7GvDY4ZhNG7MwcSYQIA/s7c5GMMwGjfmYGLItrwCtuQW0KZ5EwZ3b5NscwzDMJKKOZgYMt8Pj2X07kBqisVfDMNo3JiDiSGBBS4tPdkwDMMcTEyZbzP4DcMwyjEHEyMOHipj9Y79NE1N4dhe7ZJtjmEYRtIxBxMjVu0pRhWO7dWO5mmpyTbHMAwj6ZiDiRGrdh8CINOGxwzDMABzMDFjpXcwtsClYRiGI+EORkSOEJGZIrJSRJaLyG1e3lFEPhSRr/x7h6BzJojIGhFZLSJjg+QZIrLU73tE/NosItJMRF738rki0ieedSosLuWrnGLAJlgahmEESEYPpgT4uaoeDZwI3CwiQ4C7gOmqOhCY7j/j910JDAXOBR4XkUCQ4wngRmCgf53r5TcAuao6APgH8GA8K7Ts6zxKymBwtza0a5kWT1WGYRj1hoQ7GFXdpqoL/PZ+YCXQExgHPO8Pex64xG+PA15T1SJVXQ+sAUaKSDrQVlVnq6oCL4ScEyhrMjBG4rjyZGCByxF9bXjMMAwjQFJjMH7o6nhgLtBNVbeBc0JAV39YT2Bz0GlbvKyn3w6VVzpHVUuAPKBTPOoAFQtc2vwXwzCMCsQ1/pOgWKQ18Alwv6q+KSJ7VbV90P5cVe0gIv8EZqvqS14+EXgP2AT8WVXP9vLRwK9U9SIRWQ6MVdUtft9aYKSq7gmx4UbcEBvp6ekZU6ZMqXE9ylT5/ls7OVis/OuCLnRpWTlFOT8/n5YtW37jvJrKY1mW6a47uhOhw3QnXncidCRCdzRkZmZmq2pm2J2qmvAXkAZMA+4Ikq0G0v12OrDab08AJgQdNw04yR+zKkh+FfBk8DF+uwmwG+9MI70yMjK0Nqzclqe973xHM/7wXtj9WVlZMZHHsizTXXd0J0KH6W6YOhKhOxqALI1wX01GFpkAE4GVqvr3oF1vA9f57euAt4LkV/rMsL64YP48dcNo+0XkRF/m+JBzAmVdBszwX0TMCSxweXTnpvEo3jAMo97SJAk6TwGuBZaKyCIv+zXwADBJRG7ADX9dDqCqy0VkErACl4F2s6qW+vNuAp4DWgBT/QucA3tRRNYAObgstLhwyXE9OKJDC7ZtXBsvFYZhGPWShDsYVf0MiJTRNSbCOfcD94eRZwHDwsgL8Q4q3rRpnsYZg7uSfWBz9QcbhmE0Imwmv2EYhhEXzMEYhmEYccEcjGEYhhEXzMEYhmEYccEcjGEYhhEXzMEYhmEYccEcjGEYhhEXkrYWWV1DRHYBGw+jiM64JWniJU+EDtPdMHWY7oapIxG6o6G3qnYJuyfSGjL2qvH6amHX44mVPBE6THfD1GG6G6aOROg+3JcNkRmGYRhxwRyMYRiGERfMwcSOp+IsT4QO090wdZjuhqkjEboPCwvyG4ZhGHHBejCGYRhGXDAHYxiGYcQFczBGzBGR7sm2wTCM5GMOJkaISLqINKvhOS/699tqoa+DiIwUkdMCr2qO7x7yuUp7w+2rQf3ei+agcPWu6rsQkctFpI3f/q2IvCkiJ0RpU51ARFJF5KUk6j8lkuwwf/OkICKtkm1DVYjjiFqc1zzK4zqKyK9F5A4RaVvFcS1EZHBN7Ths4jG5pjG+gI+A9cDjuEc2T/XyIcANEc5ZAWQCi4EOQMfgVxW6fggsBXKBmUABMMPv6wZc6F9dg855N4K9DwEnA98Dxge9FoTR+w1ZBPsWhnz+C9AWSAOm42YMXxNBx8Iqyl3i308FPgXGAXO97BSgld++Bvg70BsY5HUu8/uOBX7rt8PVO6yt/vjb/D7xv/ECYBEwA5hcg2tlGtA0jDxc+d+qpqwXw8mA/wAXACnV/YYBWXW/OfBUBBvahl63/nu+GPhO4BV0fYb+P37sf4dfA78PvPz+NOBWYLJ/3eJlJ+P+P5v8ccNx/72wv3fIb1z+qua7jVRWuDrc4LenhylnX4TyWwK/A572nwcCF/rtNcDnuEfJnw+0Czov+LpdAbyNu96XA/3C6LkIWA2s95+P8+dE1B+rl2WRxRAREWAW8CjwG1UdLiJNcDfNY8IcfyvuUdBNga+DdvXx7wciqNoIjADmqOpxInIU8AfcTeWvwMe4m9Ro4JeqOrkKe9/CLROxCCjFXXQtcRfh96h4vHVb4F9AM0CBXao6KkK5P1XVx4M+L/J2fhu4BOcc/gaU+e0AbbzN+eGKBVqqaqqI/BlYqqqviMhCVT1eRJbgbjLH4m6wE3E3NoBfAk+q6vHenmXAQqB/UL3x9TotxNafATP9b7nYv48Fbsb9OV/G3QBKVfVrX/5+X1a4OijwKnAC7k9+MGj/dWHKfxd38yhHVf/u9VwD3KGq5b04f70twd2UrwdOBN7w9ewJ3A78I6i4trjHi48HXiLMb66qR/myFwa+Q//5x8C9uAZOoL6Ku/6Oxd3wyirM1h+IyFTgWSr/P3Jxvd5sKn4LVPVvIvJvnEN53ouv9cccA1wGvB3yu+4h/O89M6jOzXGPZz/C2x6JhRHK2hymDouA07yeM0K+wwXAWFWdH1y4iLzu6zxeVYeJSAtgtqoe5/cfifs/nIK7xvbiGpbB1+1VwCuqequ/bv7tj/s58ENV/a6IZANnAR8H1WMJsLIq/bGgSawKMtw/SESaq+okEZngZSUiUhrh+EeAR0TkCdzNOzDMNUtVF4vIvcB23A1TgKtxN+HLVbVQRBCRZqq6ynd/fwOMUNWdACLSBddTCetgvL0DgSHqWxoich3wfaAXlW9s+4Ffq+qbUXwPj4eI0vz7+bib60pgl3/9LUTHElUtCVeuiLwjIk8CZwMP+uGbwDBvia/POOBhVZ3o61KqqvOcLy2nBNdzLK93kI7lwbaqak7QuRK071n/Gw3E3VB3AaN8/dtE+GoCOu4Gtnrbg48NV35qyDGBMiYAdwNNRGRf0K5iXE/jI+AjEWmHuwk9AhT6stpRcePfh7t5P0SE3zzo884QM34BDFXVSmtYicgKVR0Sofqdw/w/mqjqFRGOH6Gqw4M+zxCRxUChqm4O+V1LcY2Qb/zeqnpLiI3tcL2/i6v5n4W7dsLVoT3uZt0D51AC7AOKgDkisgHXoAg0NA6p6hUicpUvp8A3+hCRXjjHMhrXcFoOfIZrCAT/X0/A/2aqOs07pR44p700qP55IfUA6B9Jf6wwBxN7DopIJ3yLTkROBPKqOWcVrvX4Ju7ie1FEnsa1eoJ7CU+IyFxgi7+g/wd8KCK5uBvWEQHn4tlD9XG2ZUB3YBuAqj4PPC8il6rqf6qvblRMEZFVuNbiT4H2wB5VPamG5XwXOBd4SFX3ikg6roUJsN//4a8BTvM35jRgh4j0p+L3uAxX1wME1TuSrd5JF/p92SLyAdAXmODjQctUNSO4ABHpWFUlVPUP/rg27qMe8J/7hCl/a+D4kDL+DPzZ9+b+ghvOCYzbB+raCdfivwaYg+ttnQecrqpnhBT5lyh+89AFDdcSvrc5W0SGqOqKMPvC/T9yReQYVV0a5vhSEemvqmv98f1wjmSziJwMqIg0xfXYVgLtIvzeoeTjhoQg8v9sd4SyWoSpw1pVPV1EblHVR4MViUhv3BD4aC+ahetlvOp7DYFy+uOcEcAmYD7wJ1X9SVBZJ1D5uv0BbgQEcBcTFaMhgd9mmYh8D0j1DaJbgS+AY6vQHxNsiCzG+AvgUWAY7ubdBbhMVZdUcc4S4CRVPeg/twJm426C/wRew10EVwE3q+rJQeeejmuRvg/ch2vtvOp3X4HrEdxZhe6ZuOGweQRdXL5ldwEwlIobF6p6b1RfxDf1dMCNRZeKyBe44auvqDyUJE6FRgxWVlF+d9zwznxV/dS35M7Atfqewo1b5/q6LMA1riLVO9jWlkBbVd0uIin+nDTcUGFnoGeYG8p6X6/g1mDgs+JiEy/iYhbg4jzjcTfI44B13oF28uVXde38CHfD6IUbNjkRd+3sBY7yep5V1e3++JlABpAVXI6qnuUbLb+noif9CXCvqub5c0OHyI7HDRXNpfKNaTIwBdcrKKLidz026P8xFNcq7wKkAkcC68IcfxbwnN8Hbvj4elzr/GFcb1aAD3AxrHZU/r3X4xzso1Rca6nA0cAkVb3LX4/f+J/580LLuhroRIT/uIiMD/2NgG/h/peBBuQlwNO4huVvcTGcD3A9lu+r6sciMhwXazzNfze9cA3GXCJct2H0AuCv4d94O8DFAP/oyw6rP1JZNcUcTBwQNyY7GHcxrVbV4mqOX4obCij0n5vjWi8X4f5Ep+Au/M+B21V1Q4RyHsT92U/1umcBJ1bjYE6PsOsqXCzmTNy47mXAPFW9oaq6hJR9lqrOEJHvhNsfzXDb4eKH0S7D3Zg64lqSCrwQwaZPRGQY7k8X7FhfEJEf4m5ilW7mqnpWFfo74lrKwVlBf8aN38/0x4wHfgL8XwSbFoST+3OXEj4e97yvwym44bDPgCdwN/YAzYFLcUMovxKR/+BumMHxjuGq+h2vKzS2Ns+Xu5SKITdwsaM7QuWqutFf2/8HjMUNwc3GxQFbEtLC98dfjrsh9sEldZzsv7uI34m3rRUuwWG//xx8nZcAG1V1i9/XhzD/M2Czb2SUl+V7xrfiHMw3/uMiEtzYCMR6OuIaCqENyPtwSQQn+nLmBA83ikhr3H95NHCDt+3KcPVV1U+q+C4ycQ6mDxWjVkpFrKhcP9BGVddHKqummIOJA77r3oegIUhVDXtD88ffAVwH/NeLLgGeU9X/V0O9CzQo4OtlS1T12JqUE3xe0Htr4E1V/Va1J1eU8QdVvVtEnvWiwMUWaKH+oKZ2hdHxmaqeKt8MrAd6C1/gWvMLCAog4zLsKjle76DzcX+8IbjA83nAZ6p6WaSbeaT4QQSH9AVuDH940HFP4eI3OWGK0Woc2HxVHSEii4BRqlrkt7/Ejf+/7A+9CuigqpeHKeMTP7yzSEMCvOFkQfu+CO5NB8lnRLJZRCaFsWsE7r9SqYWvqo8GXX+nAn/y52YCkyJ8Jb/EOc0+VP7/3Ssi3bwucI2l0JhSqK2bcCMDr+OyNANDSR+HGWKMVEY7XFJA1zANyFxVDTu9QESycL3kL3BOfJZ3uH2BbUFltQC6RWp0+mNW4+Jly6jcEHgFOE9V9/njjgbeUNVh0dQtGiwGE2PEzW0Jl50U0cGo6t9F5GMqeh7Xq+pCceP/P+Kbf5ZKN2YRuQkX2+jnh9sCtMG1xsLZWd2NeaX/nC8iPXDd876Rax62Xnf7zZv45p8+Ji0bVT3Vv4cNrIvIMlU9N4x8ARDaszsPV//huMy/6/1N6d9+f6GGT66IxG1UOKQzg3oX+0Tkd7jhK3Dj7V1V9ZKoKl2ZSPG4wVo5OD5TRBZL5fhQCu5mHZgjVSAip6rqZ0BgfkxVWVYzReRG3HBY8BDZKhF5JVTue6zh7CrAOd1AC/9BXAv/USr+QxfgEmHScBlk2RFsegsX88wO1i0i36VyhuWjIvJLVZ3sb/g3EDIcjOuhXIQbLpsoIu/ghtE+F5HHcI6nPAswQq8qH/cdzhWR4AbkRKCNiPwiTDk5uBv/rjDlvYHrxQUo9bIRYY4NsEtVp4QKReRPuJjj+bjh1BdwQ4AxwxxM7AmbnVQd/uIMvUDfwqXxfkTl1ncorwBTcUMvdwXJ9/uLNZy+6m7Mv/M3rr96u5SKG21N+R8VvYhAwDxRXecvJCiAHIUzHqyqZSJSIm7i2k6gnz8m0s08EpUcEm6YYjDuj9yHihb7J7i4Qo17v6r6bb95j4+vBOJxT4nIiao6x5c7ytcvm4rvvgTYgLu5gmsIPO9b3eDG+6+ron7f8+8Tgk3y9SmiYsw/IH8TWBjGrnwqX9+lVMSvvpagzEHczfoOdcko38A7jXANisVEzrB8ERcPGYtLu74aWKmqBbie0iRxcbmHfd1m+WKD45EKnCUiU6j4flNwPeFnfTmhDcjAUNTNIeX0Aw6JyN8JiYcBTVT1UPnBqofEJTlUxd3i0r2nE+LwRSQN+BB3/V+iql9VU1aNsCGyGCMibwC3qmq4zJWalhVxeCKR+Jtjc/XB3lqcvyyW3e4odS7F/Vmb4GIggQByKu6Pv4gwzlhEHsel5l6Jm0twAFikqteHlF+eXBH8hw855r84x3E7bh7C8bihkU642FagtxjgYcL0flX11lrUOw3nzDb5z71xk/JG4BzsqV7+KfCEd4SBeFV/XKZfntdfq8SOGti1A+fIvzFELC5AfS5u3tNX4jIHj8HNUbmTkFgZboLioxqSkSYiSzVoLpq4hI3FqnqMBM2l8sNxacA0dYkPp+OSZc7D/XavaxWZdlJFrKcmSIR4GM4RPKqqb/vjxuHuN2OqKOslXA8lMC9pFO67n+4POQv3/9gAUJPrrdp6mIOJDUEtlzbUMMujijLvA75Q1aiWXok1NW1NV1HOU4T508cTcamh4WiNcxr7I+x/GNdC/RTX22qrVWRx1cCe03Et49G4FmrwxNqAoymiFr3fED2R6h3gr0SIzYjI+4SJV6lq8FylapM3cNf+o1QEzT/DpVJvr8KuTgQlp6jqwqoqIS6d+3VcbOEnuJ7WLtyQVnCDIvDdvk+EDEsRmaeqI0VkFs75bvd1EJyzn4Sb0BkYwuuGiwf1UNXzRGQILgt0YoiNnXHp+GF/T+887wCOVNUbxaUQD1bVdyLFw3BDzS/j5roILr4zXlXXVPFdhTrXqnqlgakKMcEcTIzwNxDBdeN/FbwLeFAjzHqvpsz9QCvcH6WYiuB4jdN4a6E7bCyplq3p0F5EeRpqTI2OzqZ3VPVCiZxK/EMqMnf64eo/S1UfjqENT6jqTWHkMev9VqF7cUgMpFwWbU9TqknewM2Qf4WKGNM1wNWqek5sagEikq2qGRKUxCIin+DSvcPNORmHuxmPpsKJ/def90PcKhjH4FKiW+My4V4NBMBDdIdbjWAVsAWXqPFHX/fOuN7yeFV9P0w5EWfyi8hs3CocwfGwh9TPHROXdCPqs+Sq+a6eBv6h4eclxRVzMDFGYpjJ5c/9RpqrVpGSGCtEZCVxbk2r6sbaln24eAc6C/hUVVeF7EvFDSWdiWsdF6hfLiVOtsS891uFrudwy78Ex0CuU9Wf1rSn6YPj4ZI3vhOu9R3L4V4RmaOqJ4rINNwqBVtx8ZRHcI2E0Dkn3XDDnguAZ3BDYIGssGZB9QisOqG4Xle44P8x6jL3yucFiUi+1xWYh3Oeqs4Rl9jxqgbNHwqqQ5aqZoaUE3D2x+GGx9oFnXIxLhb1DdQvHxThu1qJayyux11XR+KGKEOHaANlxazhZ0H+GBFF8Lg2ZUZKc4043hpDKs3wrw3JdCBR8Cyup/KouNnhC3HDYhfjeo2z/efywHAceYiK3m9wJllAdtiExEDGi0vBDY7NgPs+vu97d9H0NCMlb+wWt05aYDjqKlwWYiy5zycj/Bw3HNcWt27cfbi5X5Uy0nxs5Xe4xIPrgcfEpUxPxE2y/EbmGRGC/4RfjaBMVT/wn+8NOHB1mYaR6nBIIs+kX4lzcIF42ADctVGbpVxCkx664IYT4471YGKEv9g7UINMrijKrNG8i1iQyNZ0sgnXU8Fl42Xg6vw5rpcz22cUxduemPZ+Q8qptjcZ6ZhIDYVIQ2riVlF4DDiJirlItyWiwSERJi0HYhDiZshfj7vpzsQ12nqpaugyOEQK/uPiPqEz+UvUr78W+juG+129/Bwiz+SPKh5WASNwWgAABZBJREFU17EeTIxQl2GVh2utxYqazruIBXFvTdcFRGQ6VfRU/Bj39bieTnfcpLd42RLz3m8o0dzca+EAKqWAQ7nT/lO8GyISYY4YfukaCZlzIm7l8utwy/L8GxffKBaXTbY3tB6ewAoce8Wt7rAd6KOqC3zMtXwmP1AobtFRwa1VFojdCJWH14IZj1stezIuPnmbVszk76Xh062jmhtXFVLNat+xjPGag6nb1HTexWETiO+ISFporMd35xsKS3A9lWG4hsFeH1i9ARcIzsA9FuEZKj9SIB7UeB5TMglJ3rheREIztraJSFONkL4dI8LOEVPV/0j4Scv34mJD5U40qB47gQVh6vGIuPkvv8U9WiEQ/AcYScVN/gSvp6YZloFh2nPwCSUiEkgo+YbzrqreNUGrWe07ltgQWT1Boph3ESM95a1p3Gq5AdoAn6vqNfHSnQyCeiq/wPVUfosbFsvWCI8NaOxUN9yGm0f0jefdVBWIroUNh500EEU9thM++D+Qw8ywDLIhbEKJiKzAxV0qxcNwsZ6kz42LFnMwRiXiEUuqi4jI/1G5pxLIKJuRVMPqMSLyoqpeKyJ7qfxQM4DyxxTESFfc54j5OEgg+B/cW/ghh5lh6csPHab9TCtWGojk/H5EEufG1RRzMEajRER+ifVUYopvdZ+HW4PsjND9sWighMQPWuNa94HfL6bxgyqSGGIyX0lE/kGUCSWJrHcsMQdjGEZM8IH0m4C+VI4VBoLH/cKeWDtdL+Ja/Z+q6srqjq+ljkrzguKVYRk6TKuqERNKElHvWGIOxjCMmCIRViqIsY6zqLziwkLcTfewV1yQyCtQtPby3cRgtY7aDNPGs97xwByMYRj1knituBBF8P+/sZivVNth2njVOx6YgzEMo95RVYA8jjqTnmGZjHofDjYPxjCM+kjYeUxxXnGhLsxXSka9a431YAzDqLfUJEDekKgv9bYejGEY9Y4wAfJErLiQdOpbvc3BGIZRH2kB/J3GN4+pXtXbhsgMwzCMuJCSbAMMwzCMhok5GMMwDCMumIMx/n979w9yVR3Hcfz9XsREKMylQX0GS0iICI1CEFJpcjEcdNIphxzSNREHB4egofAfLYEQogSFky4Pomg8IQ+p+RcRBBt8hsiIFOzbcH43LtcQfLjHCj+v5Z5zzz3n/M6Fy5fv79zz/UYP1E/Uy+qP6rRda+K+zjWprujr+BGzlZv8EWOmvgusB96qqgfqQmDOvzysiGcuGUzE+L0CzFTVA4Cqmqmqu+pudUq9pB62NWtvGchn6mn1irpS/Ua90crSo06oV9WvWlZ0XJ03emL1ffWcekE91p6XQN2n/tT2/fQZfhfxHEuAiRi/k8Ai9bq6vzWLA/iiqla2EvAv0GU5Aw+rajVwkK5r4Ud0T2tvVV9un1kGHG41r36lK1vyt5Yp7QLWtVpZPwA71QXABmB523dvD9cc8ZgEmIgxq6rf6B6E+xC4BxxVtwLvqd+3ar1rgOVDu33XXi8Cl6vq55YB3QIWtW13qupsWz5CV1V32DvA68BZdZquB/0SumD0B/Cl+gHw+9guNuIJcg8mogdV9QiYBCZbQNkGvAGsqKo76h5g7tAug74ifw4tD9YHv9PRh9ZG1wVOVdXm0fGobwNrgU3AdroAF9GrZDARY6YuU18deutN4Fpbnmn3RTbO4tCL2x8IADYDZ0a2nwdWqUvbOOapr7Xzvdja7H7cxhPRu2QwEeM3H/hcfYmure1NuumyX+imwG4DU7M47hVgi3oIuAEcGN5YVffaVNzX6qD44S7gPvCtOpcuy9kxi3NHPLWUion4H1AngBP/1CM+4r8qU2QREdGLZDAREdGLZDAREdGLBJiIiOhFAkxERPQiASYiInqRABMREb1IgImIiF78BTKqZ/q+LF3sAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x11c3af11ac0>"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fdist1.plot(50, cumulative=True)  # 前50高频词累计频率图"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['uncompromisedness',\n",
       " 'subterraneousness',\n",
       " 'irresistibleness',\n",
       " 'cannibalistically',\n",
       " 'simultaneousness',\n",
       " 'comprehensiveness',\n",
       " 'hermaphroditical',\n",
       " 'circumnavigation',\n",
       " 'superstitiousness',\n",
       " 'characteristically',\n",
       " 'apprehensiveness',\n",
       " 'uninterpenetratingly',\n",
       " 'indiscriminately',\n",
       " 'responsibilities',\n",
       " 'preternaturalness',\n",
       " 'circumnavigating',\n",
       " 'uncomfortableness',\n",
       " 'physiognomically',\n",
       " 'undiscriminating',\n",
       " 'supernaturalness']"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 找出文本中长度超过15的词\n",
    "long_words = [w for w in set(text1) if len(w) > 15]\n",
    "long_words[:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('more', 'is'), ('is', 'than'), ('than', 'done')]"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 获取相邻词对\n",
    "two = bigrams(['more', 'is', 'than', 'done'])\n",
    "list(two)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "United States; fellow citizens; four years; years ago; Federal\n",
      "Government; General Government; American people; Vice President; God\n",
      "bless; Chief Justice; Old World; Almighty God; Fellow citizens; Chief\n",
      "Magistrate; every citizen; one another; fellow Americans; Indian\n",
      "tribes; public debt; foreign nations\n"
     ]
    }
   ],
   "source": [
    "text4.collocations()  # 检索高频双连词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys([1, 4, 2, 6, 8, 9, 11, 5, 7, 3, 10, 12, 13, 14, 16, 15, 17, 18, 20])"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t = [len(w) for w in text1]  # 获取文本词长度列表\n",
    "fdist = FreqDist(t)  # 获取词长频数字典\n",
    "fdist.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'dict_items'>\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[(3, 50223),\n",
       " (1, 47933),\n",
       " (4, 42345),\n",
       " (2, 38513),\n",
       " (5, 26597),\n",
       " (6, 17111),\n",
       " (7, 14399),\n",
       " (8, 9966),\n",
       " (9, 6428),\n",
       " (10, 3528),\n",
       " (11, 1873),\n",
       " (12, 1053),\n",
       " (13, 567),\n",
       " (14, 177),\n",
       " (15, 70),\n",
       " (16, 22),\n",
       " (17, 12),\n",
       " (18, 1),\n",
       " (20, 1)]"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "items = fdist.items()  # 检索指定长度的词的数量\n",
    "print(type(items))\n",
    "sorted(items, key = lambda i : i[1], reverse=True)  # 降序显示"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['I', 'am', 'hnuer', '.', 'and', 'you', '?']\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "FreqDist({'I': 1, 'am': 1, 'hnuer': 1, '.': 1, 'and': 1, 'you': 1, '?': 1})"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t = [\"I\", \"am\" , \"hnuer\", \".\", \"and\", \"you\", \"?\"]\n",
    "print(t)\n",
    "fdist = FreqDist(t)\n",
    "fdist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.14285714285714285\n",
      "7 7\n"
     ]
    }
   ],
   "source": [
    "print(fdist.freq('I'))  # 计算词频\n",
    "print(fdist.N(), len(fdist))  # 计算总数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    I    am hnuer     .   and   you     ? \n",
      "    1     1     1     1     1     1     1 \n"
     ]
    }
   ],
   "source": [
    "fdist.tabulate()  # 创建频率分布表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
